*************************************************
* Purpose -- Create events for MSCZ sample from
* CBP data to use in Table D3
************************************************
clear all
* RUN 00_path_master.do FIRST TO GET FILEPATHS

************************************************
* Loading different samples
************************************************

* MSCZ

frame create msczes
frame change msczes

use "$clean/cbp/mscz_sample.dta", clear



************************************************
* Constructing events, post-periods, clean controls
************************************************

* for event study, we use emp/pop as dependent variable instead of using emp 
* as dependent variable and adding pop as control
frame change msczes
gen lepop75 = ln(emp75/pop)

forvalues i = 1985/1989 {
	insobs 1
	replace year = `i' if _n == _N
}

fillin pair_id_czstate year
drop if mi(pair_id_czstate)
bysort pair_id_czstate (statefips): replace statefips = statefips[1] if mi(statefips)
bysort pair_id_czstate (pair_id_num): replace pair_id_num = pair_id_num[1] if mi(pair_id_num)
bysort pair_id_czstate (czone): replace czone = czone[1] if mi(czone)

* Merge with minimum wage data (VZ)
* We use data from JNR's replication package for TWFE estimates from CBP for replicability,
* but for the event studies we directly use Vaghul and Zipperer data
merge m:1 statefips year using "$raw/other/mw_state_annual_vz", keep(3) keepusing(max_mw) nogen
replace mw = max_mw
replace lmw = log(mw)
drop max_mw
sort pair_id_czstate year


* Prepare MSCZ dataset 

* Create pre-and post-means and difference variables
foreach var in lemp75 learn75 lepop75 lmw {
	forvalues i = 1/5 {
		gen `var'_f`i' = F`i'.`var'
	}
	
	gen `var'_pre = L.`var'
	egen `var'_post = rowmean(`var' `var'_f1 `var'_f2 `var'_f3 `var'_f4 `var'_f5) ///
	if inrange(year, 1998, 2001) | inrange(year, 2010, 2011)
	egen `var'_postb = rowmean(`var' `var'_f1 `var'_f2 `var'_f3 `var'_f4) if ///
	inlist(year, 2002, 2012)
	egen `var'_postc = rowmean(`var' `var'_f1 `var'_f2 `var'_f3) if ///
	inlist(year, 1992, 2003, 2013)
	egen `var'_postd = rowmean(`var' `var'_f1 `var'_f2) if ///
	inlist(year, 1993, 2004, 2014)
	egen `var'_poste = rowmean(`var' `var'_f1) ///
	if inlist(year, 1994, 2005, 2015)
	gen `var'_postf = `var' ///
	if inlist(year, 1995, 2006, 2016)
	
	replace `var'_post = `var'_postb if inlist(year, 2002, 2012)
	replace `var'_post = `var'_postc if inlist(year, 1992, 2003,2013)
	replace `var'_post = `var'_postd if inlist(year, 1993, 2004, 2014)
	replace `var'_post = `var'_poste if inlist(year, 1994, 2005, 2015)
	replace `var'_post = `var'_postf if inlist(year, 1995, 2006, 2016)

	drop `var'_postb `var'_postc `var'_postd `var'_poste `var'_postf
	
	gen `var'_dif = `var'_post - `var'_pre
}

gen max_lmw_dif = .
replace max_lmw_dif = F5.lmw - L.lmw if inrange(year, 1998, 2001) | inrange(year, 2010, 2011)
replace max_lmw_dif = F4.lmw - L.lmw if inlist(year, 2002, 2012)
replace max_lmw_dif = F3.lmw - L.lmw if inlist(year, 1992, 2003, 2013)
replace max_lmw_dif = F2.lmw - L.lmw if inlist(year, 1993, 2004, 2014)
replace max_lmw_dif = F.lmw - L.lmw if inlist(year, 1994, 2005, 2015)
replace max_lmw_dif = lmw - L.lmw if inlist(year,  1995, 2006, 2016)

* Create events
gen initial_event = mw >= 1.05*L.mw & mw >= L.mw + 0.25 & mw > fed_mw

gen admissible_event = initial_event == 1 & ~inlist(year,1990,1991,1996,1997,2007,2008,2009)

gen event_combined = admissible_event == 1 & L.admissible_event == 0 & ///
L2.admissible_event == 0 & L3.admissible_event == 0

preserve
	import excel using "$raw/other/indexed_mw.xlsx", firstrow clear
	tempfile index
	save `index'
restore

merge m:1 statefip using `index', keep(1 3) nogen
gen indexed_mw = year >= index_start


sort pair_id_czstate year, stable

gen check1 = event_combined == 1 & L.mw > L2.mw & ///
~inlist(L.year,1990,1991,1996,1997,2007,2008,2009) & L.indexed_mw == 0

replace event_combined = 0 if check1 == 1
replace event_combined = 1 if F.check1 == 1
replace check1 = 1 if F.check1 == 1

gen check2 = event_combined == 1 & check1 == 1 & L.mw > L2.mw & ///
~inlist(L.year,1990,1991,1996,1997,2007,2008,2009) & L.indexed_mw == 0

replace event_combined = 0 if check2 == 1
replace event_combined = 1 if F.check2 == 1
replace check2 = 1 if F.check2 == 1

gen check3 = event_combined == 1 & check2 == 1 & L.mw > L2.mw & ///
~inlist(L.year,1990,1991,1996,1997,2007,2008,2009) & L.indexed_mw == 0

* no states left, so these are our final "provisional" events

* one change for vermont, as even though increase is in an "indexed" period, and small, we know it was a state legislated increase
replace event_combined = 1 if year == 2015 & statefips == 50

/*we manually remove some events that are overlapping with post periods of other
events. There are 2 such instances: Oregon 1998 and 2003; and Rhode Island 1999 and 2004. 
We have to decide whether to keep the latter year only in the former event, 
or let the latter year be a standalone event by itself. We choose the first option
(which amounts to removing the latter event) if the MW increase in the post-period
of the latter event is smaller than the MW increase in the post-period of the former
event. If this is not the case, we keep both events and reduce the post-period of 
the former event by one year (so that the latter event is no longer in the post-period
of the former.) In practice, we choose the first option for both cases 
- i.e their latter events get dropped and get evaluated 
as part of the earlier event. */

replace event_combined = 0 if (statefips == 41 & year == 2003) ///
| (statefips == 44 & year == 2004)

* Create clean controls	
gen mw_increase = mw > L.mw & mw > fed_mw

gen clean_control = 0

replace clean_control = 1 if inlist(year,1995,2006,2016) & ///
L.mw_increase == 0 & L2.mw_increase == 0 & L3.mw_increase == 0 ///
& mw_increase == 0

replace clean_control = 1 if inlist(year,1994,2005,2015) & ///
L.mw_increase == 0 & L2.mw_increase == 0 & L3.mw_increase == 0 ///
& mw_increase == 0 & F.mw_increase == 0

replace clean_control = 1 if inlist(year,1993,2004,2014) & ///
L.mw_increase == 0 & L2.mw_increase == 0 & L3.mw_increase == 0 ///
& mw_increase == 0 & F.mw_increase == 0 & F2.mw_increase == 0

replace clean_control = 1 if inlist(year,1992,2003,2013) & ///
L.mw_increase == 0 & L2.mw_increase == 0 & L3.mw_increase == 0 ///
& mw_increase == 0 & F.mw_increase == 0 & F2.mw_increase == 0 & ///
F3.mw_increase == 0

replace clean_control = 1 if inlist(year,2002,2012) & ///
L.mw_increase == 0 & L2.mw_increase == 0 & L3.mw_increase == 0 ///
& mw_increase == 0 & F.mw_increase == 0 & F2.mw_increase == 0 & ///
F3.mw_increase == 0 & F4.mw_increase == 0 

replace clean_control = 1 if (inrange(year,1998,2001) ///
| inrange(year,2010,2011)) & L.mw_increase == 0 & L2.mw_increase == 0 & ///
L3.mw_increase == 0 & mw_increase == 0 & F.mw_increase == 0 & ///
F2.mw_increase == 0 & F3.mw_increase == 0 & F4.mw_increase == 0 & ///
F5.mw_increase == 0


************************************************
* Creating frames for trend analysis
************************************************

xtset pair_id_czstate year
foreach var in lemp75 learn75 lepop75 lmw lemp_other lpop {
	forvalues i = 1/3 {
		gen `var'pre`i' = L`i'.`var'
	}
	
	forvalues i = 1/5 {
		gen `var'post`i' = F`i'.`var'
	}
	
	local j = 1
	forvalues i = 3(-1)1 {
		gen _`var'_dif`j' = `var'pre`i' - `var'pre1
		local ++j
	}
	
	gen _`var'_dif4 = `var' - `var'pre1
	
	local j = 5
	forvalues i = 1/5 {
		gen _`var'_dif`j' = `var'post`i' - `var'pre1
		local ++j
	}
	
	forvalues i = 1/9 {
		replace _`var'_dif`i' = . if inlist(year,1990,1991,1996,1997,2007,2008,2009)
	}
	
	replace _`var'_dif9 = . if inrange(year,1992,1995) | inrange(year,2002,2006) | inrange(year,2012,2016)
	replace _`var'_dif8 = . if inrange(year,1992,1995) | inrange(year,2003,2006) | inrange(year,2013,2016)
	replace _`var'_dif7 = . if inrange(year,1993,1995) | inrange(year,2004,2006) | inrange(year,2014,2016)
	replace _`var'_dif6 = . if inrange(year,1994,1995) | inrange(year,2005,2006) | inrange(year,2015,2016)
	replace _`var'_dif5 = . if inlist(year,1995,2006,2016)
	
	forval j = 1990/1998 {
		
		gen _`var'_longpre`j' = `var' if year == `j'
		bys pair_id_czstate (year): egen `var'_longpre`j' = mean(_`var'_longpre)
		replace `var'_longpre`j' = . if inlist(year,1990,1991,1996,1997,2007,2008,2009)
	
		drop _`var'_longpre*
	}
	
	
	local j=1
	forvalues i=1990/1998 {
		g `var'_dif`j' = `var'_longpre`i' - `var'pre1
		g `var'_dif`j'_2 = `var'_longpre`i' - `var'pre3
		local ++j
	}
	
	forvalues i=1/9 {
		g `var'_dif`j' = _`var'_dif`i'
		local ++j
	}
	
	drop _`var'_dif*
}
keep if year >= 2000

save "$intermediate/cbp_msczes.dta", replace

